if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  rdrobust,
  tidyverse,
  readxl,
  kableExtra,
  haven,
  labelled,
  conflicted,
  furrr,
  future
)

conflicts_prefer(dplyr::select, dplyr::filter, dplyr::mutate, dplyr::arrange, dplyr::between)

# Source helper function to tidy rdrobust results
source("code/helper_function.R")

# Civey data -------------------------------------------------------------

civey <- read_rds("data_not_upload/civey_data_clean.rds") %>%
  filter(!is.na(yob)) %>%
  filter(yob > 1945)

# Men -------------------------------------------------------------------------

civey_m <- civey %>%
  filter(male_bin == 1 & east_bin == 1)
civey_f <- civey %>%
  filter(male_bin == 0 & east_bin == 1)

m1 <- rdrobust(
  y = civey_m %>% pull(gdr_dictatorship_bin),
  x = civey_m$yob,
  c = 1971,
  p = 1,
  deriv = 1,
  scalepar = -1
)
m2 <- rdrobust(
  y = civey_f %>% pull(gdr_dictatorship_bin),
  x = civey_f$yob,
  c = 1971,
  p = 1,
  deriv = 1,
  scalepar = -1
)

# West Germany -------------------------------------------------------------------------

civey_m <- civey %>%
  filter(male_bin == 1 & east_bin == 0)
civey_f <- civey %>%
  filter(male_bin == 0 & east_bin == 0)

m1_west <- rdrobust(
  y = civey_m %>% pull(gdr_dictatorship_bin),
  x = civey_m$yob,
  c = 1971,
  p = 1,
  deriv = 1,
  scalepar = -1
)
m2_west <- rdrobust(
  y = civey_f %>% pull(gdr_dictatorship_bin),
  x = civey_f$yob,
  c = 1971,
  p = 1,
  deriv = 1,
  scalepar = -1
)

# Combine results -------------------------------------------------------------------------

res_civey <- list(m1, m2, m1_west, m2_west) %>%
  lapply(tidy_rd, se_nr = 3) %>%
  reduce(bind_rows) %>%
  mutate(
    gender = rep(c("male", "female"), 2),
    sample = rep(c("east", "west"), each = 2)
  )

res_civey %>%
  dplyr::select(gender, sample, estimate, conf.low, conf.high, n, bw_left_h) %>%
  mutate(across(-c(gender, sample, n, bw_left_h), ~ . * 100))

bw_h_male_east <- res_civey %>%
  filter(gender == "male" & sample == "east") %>%
  pull(bw_left_h)

bw_h_female_east <- res_civey %>%
  filter(gender == "female" & sample == "east") %>%
  pull(bw_left_h)

bw_b_male_east <- res_civey %>%
  filter(gender == "male" & sample == "east") %>%
  pull(bw_right_b)

bw_b_female_east <- res_civey %>%
  filter(gender == "female" & sample == "east") %>%
  pull(bw_right_b)

# P values for difference in coefficients between men and women in east ------------------

est_m_east <- res_civey %>%
  filter(gender == "male" & sample == "east") %>%
  pull(estimate)

est_f_east <- res_civey %>%
  filter(gender == "female" & sample == "east") %>%
  pull(estimate)

se_m_east <- res_civey %>%
  filter(gender == "male" & sample == "east") %>%
  pull(std.error)

se_f_east <- res_civey %>%
  filter(gender == "female" & sample == "east") %>%
  pull(std.error)

se_diff_civey <- sqrt(se_m_east^2 + se_f_east^2)

# Calculate p-value for difference in coefficients between men and women in east ------------------

est_diff_civey <- est_m_east - est_f_east
tstat_diff_civey <- est_diff_civey / se_diff_civey
pval_diff_civey <- 2 * pnorm(tstat_diff_civey)

cat("P.val for diff between men and women in East: ", pval_diff_civey)

# Check results

res_civey %>%
  dplyr::select(gender, sample, estimate, conf.low, conf.high, n) %>%
  mutate(across(-c(gender, sample, n), ~ . * 100)) %>%
  filter(sample == "east")

# Check number of respondents by year

civey_f %>%
  count(yob) %>%
  arrange(yob) %>%
  filter(between(yob, 1970, 1980))

civey_m %>%
  count(yob) %>%
  arrange(yob) %>%
  filter(between(yob, 1970, 1980))

# Cont. w/ forsa data --------------------------------------------------------------------

rm(list = ls()[!ls() %in%
  c("m1", "m2", "m1_west", "m2_west", "res_civey", "res_civey_donut")])

source("code/helper_function.R")

## Load data

forsa <- read_rds("data/forsa.rds") %>%
  haven::zap_labels() %>%
  filter(!is.na(vote_afd)) %>%
  filter(!is.na(yob)) %>%
  filter(as.numeric(yob) > 1945) %>%
  filter(year > 2016) %>%
  mutate(vote_voter = 1 - vote_nonvoter)

# Generate datasets

forsa_m_east <- forsa %>%
  filter(male == 1 & east == 1)
forsa_m_west <- forsa %>%
  filter(male == 1 & east == 0)
forsa_w_east <- forsa %>%
  filter(male == 0 & east == 1)
forsa_w_west <- forsa %>%
  filter(male == 0 & east == 0)

# Estimate

m1 <- rdrobust(
  y = forsa_m_east$vote_afd,
  x = forsa_m_east$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)

m2 <- rdrobust(
  y = forsa_w_east$vote_afd,
  x = forsa_w_east$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)

m3 <- rdrobust(
  y = forsa_m_west$vote_afd,
  x = forsa_m_west$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)

m4 <- rdrobust(
  y = forsa_w_west$vote_afd,
  x = forsa_w_west$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)

res_vote <- list(m1, m2, m3, m4) %>%
  lapply(tidy_rd, se_nr = 3) %>%
  reduce(bind_rows) %>%
  mutate(
    gender = rep(c("male", "female"), 2),
    sample = rep(c("east", "west"), each = 2)
  )
res_vote %>% dplyr::select(gender, sample, n)

# Se, tstat, pval for the difference in vote afd between men and women in east

est_m_east <- res_vote %>%
  filter(gender == "male" & sample == "east") %>%
  pull(estimate)

est_f_east <- res_vote %>%
  filter(gender == "female" & sample == "east") %>%
  pull(estimate)

se_m_east <- res_vote %>%
  filter(gender == "male" & sample == "east") %>%
  pull(std.error)

se_f_east <- res_vote %>%
  filter(gender == "female" & sample == "east") %>%
  pull(std.error)

se_diff_vote_east_forsa <- sqrt(se_m_east^2 + se_f_east^2)

est_diff_vote_east_forsa <- est_m_east - est_f_east

tstat_diff_vote_east_forsa <- est_diff_vote_east_forsa / se_diff_vote_east_forsa

pval_diff_vote_east_forsa <- 2 * (1 - pnorm(abs(tstat_diff_vote_east_forsa)))

cat("P.val for diff between men and women in East (Forsa): ", pval_diff_vote_east_forsa)

# Convert to table

tab_full <- bind_rows(res_civey, res_vote) %>%
  mutate(what = rep(c("civey", "vote"), each = 4)) %>%
  mutate(label_proper = rep(c(
    "GDR was a dictatorship",
    "AfD support"
  ), each = 4)) %>%
  mutate(gender = ifelse(gender == "male", "Men", "Women"))

#

tab_full <- tab_full %>%
  mutate_at(vars(estimate, matches("conf|estimate")), ~ . * 100) %>%
  mutate_at(vars(
    estimate, matches("conf"),
    p.value
  ), list(~ round(., 2))) %>%
  mutate(ci = paste0("[", conf.low, ", ", conf.high, "]")) %>%
  dplyr::select(
    sample, label_proper, gender, estimate,
    ci, p.value, bw_left_h,
    n
  ) %>%
  mutate(bw_left_h = round(bw_left_h, 0)) %>%
  mutate(n = format(n, nsmall = 1, big.mark = ",")) %>%
  mutate(label_proper = str_replace(label_proper, "\n", " "))

##

tab_final <- tab_full %>%
  filter(sample == "east") %>%
  slice(3:4, 1:2) %>%
  dplyr::select(-sample)
tab_final

# Table 2, rows 1-4: East Germany ----------------------------------------------

kable(tab_final, "latex",
  longtable = F,
  booktabs = T, col.names = c(
    "Outcome",
    "Sample",
    "$\\hat{\\tau}_{\\text{SRK}}$",
    "95\\% CI",
    "P-val",
    "$h_{\\text{MSE}}$",
    "$n$"
  ),
  linesep = "", caption = "Main RDD results",
  label = "tab:main_appendix",
  escape = F
) %>%
  kable_styling(
    latex_options = c("repeat_header"),
    font_size = 11
  ) %>%
  # collapse_rows() %>%
  row_spec(0, bold = T) %>%
  kable_styling(latex_options = "HOLD_position") %>%
  pack_rows(group_label = "Voting", start_row = 1, end_row = 2) %>%
  pack_rows(group_label = "Attitudinal outcomes", start_row = 3, end_row = 4)

# Table 3, rows 1-4: West Germany ----------------------------------------------

tab_final <- tab_full %>%
  filter(sample == "west") %>%
  slice(3:4, 1:2) %>%
  dplyr::select(-sample)
tab_final

# Output as table

kable(tab_final, "latex",
  longtable = F,
  booktabs = T, col.names = c(
    "Outcome",
    "Sample",
    "$\\hat{\\tau}_{\\text{SRK}}$",
    "95\\% CI",
    "P-val",
    "$h_{\\text{MSE}}$",
    "$n$"
  ),
  linesep = "", caption = "Main RDD results",
  label = "tab:main_appendix",
  escape = F
) %>%
  kable_styling(
    latex_options = c("repeat_header"),
    font_size = 11
  ) %>%
  # collapse_rows() %>%
  row_spec(0, bold = T) %>%
  kable_styling(latex_options = "HOLD_position") %>%
  pack_rows(group_label = "Voting", start_row = 1, end_row = 2) %>%
  pack_rows(group_label = "Attitudinal outcomes", start_row = 3, end_row = 4)

# Forsa: work w/ pre-treatment covariates ----------------------------------------------

glimpse(forsa)

# Define a mapping from covariate names to proper names
covar_mapping <- tibble::tibble(
  covariate = c(
    "male", "educ_low",
    "educ_high", "educ_med", "unemployed",
    "abitur", "married_now", "ever_married", "religion_bin"
  ),
  covar_proper = c(
    "Male", "Low education",
    "High education", "Medium education", "Unemployed",
    "High school graduate\n(Abitur)", "Currently married", "Ever married", "Any religion"
  ),
  order = c(1, 2, 4, 3, 5, 6, 7, 8, 9)
) %>%
  mutate(covar_proper = factor(covar_proper, levels = covar_proper)) %>%
  mutate(covar_proper = fct_reorder(covar_proper, order))

# List of covariates to check for balance
covar_list <- covar_mapping$covariate

# Quick summary of covariates in the Forsa data
summary(forsa[covar_list])

# Remove any value labels from Forsa data
forsa <- zap_labels(forsa)

# Subset Forsa data for East Germany and by gender
forsa_east <- forsa %>% filter(east == 1)
forsa_east_m <- forsa_east %>% filter(male == 1) # Men in East
forsa_east_f <- forsa_east %>% filter(male == 0) # Women in East

plan("future::multisession")

# Loop over each covariate in covar_list using furrr and obtain the rdrobust results
covar_results <- furrr::future_map_dfr(covar_list, function(covar) {
  if (covar == "male") {
    # Eastern sample estimation for the covariate
    rd_east <- rdrobust(
      y = as.numeric(forsa_east[[covar]]),
      x = as.numeric(forsa_east$yob),
      c = 1971,
      p = 1
    )

    tidy_final <- tidy_rd(rd_east, se_nr = 3) %>%
      dplyr::mutate(covariate = covar, sample = "All respondents")
  } else {
    rd_east_m <- rdrobust(
      y = as.numeric(forsa_east_m[[covar]]),
      x = as.numeric(forsa_east_m$yob),
      c = 1971,
      p = 1
    )
    rd_east_f <- rdrobust(
      y = as.numeric(forsa_east_f[[covar]]),
      x = as.numeric(forsa_east_f$yob),
      c = 1971,
      p = 1
    )

    tidy_east_m <- tidy_rd(rd_east_m, se_nr = 3) %>%
      dplyr::mutate(covariate = covar, sample = "Men")
    tidy_east_f <- tidy_rd(rd_east_f, se_nr = 3) %>%
      dplyr::mutate(covariate = covar, sample = "Women")

    tidy_final <- dplyr::bind_rows(tidy_east_m, tidy_east_f)
  }

  tidy_final
}, .progress = TRUE)

# Add proper covariate names by joining with our mapping table
covar_results <- covar_results %>%
  dplyr::left_join(covar_mapping, by = "covariate")

# Figure A.8 -- Covariate balance ----------------------------------------------

# Create a plot: points with error bars showing the RdRobust estimate and CI by covariate across regions
covar_plot <- ggplot(covar_results, aes(x = estimate, y = reorder(covar_proper, order), color = sample, fill = sample)) +
  geom_vline(xintercept = 0, linetype = "dotted", color = "gray50") +
  geom_point(position = position_dodge(width = 0.5), size = 2) +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high),
    width = 0.0,
    position = position_dodge(width = 0.5)
  ) +
  labs(
    x = "RD estimate",
    y = "Covariate (all binary)",
    title = NULL,
    color = NULL,
    fill = NULL
  ) +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_color_grey(start = 0.2, end = 0.8) +
  scale_fill_grey(start = 0.2, end = 0.8)

# Print the plot
print(covar_plot)

# Forsa: left party and non-voting ------------------------------------------------

m1 <- rdrobust(
  y = forsa_m_east$vote_voter,
  x = forsa_m_east$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)
m2 <- rdrobust(
  y = forsa_w_east$vote_voter,
  x = forsa_w_east$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)
m3 <- rdrobust(
  y = forsa_m_east$vote_left,
  x = forsa_m_east$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)
m4 <- rdrobust(
  y = forsa_w_east$vote_left,
  x = forsa_w_east$yob,
  c = 1971,
  deriv = 1,
  scalepar = -1,
  p = 1
)

res_vote <- list(m1, m2, m3, m4) %>%
  lapply(tidy_rd, se_nr = 3) %>%
  reduce(bind_rows) %>%
  mutate(
    gender = rep(c("male", "female"), 2),
    sample = c(rep(c("east"), each = 4))
  ) %>%
  mutate(outcome = c(rep(c("turnout", "left_party_support"), each = 2)))

# Check

res_vote %>% dplyr::select(estimate, p.value, gender, sample, outcome)

##

tab_full <- bind_rows(res_vote) %>%
  mutate(label_proper = rep(c(
    "Turnout",
    "Left party support"
  ), each = 2)) %>%
  mutate(gender = ifelse(gender == "male", "Men", "Women"))

## Table

tab_full <- tab_full %>%
  mutate_at(vars(estimate, matches("conf|estimate")), ~ . * 100) %>%
  mutate_at(vars(
    estimate, matches("conf"),
    p.value
  ), list(~ round(., 2))) %>%
  mutate(ci = paste0("[", conf.low, ", ", conf.high, "]")) %>%
  dplyr::select(
    sample, label_proper, gender, estimate,
    ci, bw_left_h,
    n, p.value
  ) %>%
  mutate(bw_left_h = round(bw_left_h, 0)) %>%
  mutate(n = format(n, nsmall = 1, big.mark = ",")) %>%
  mutate(label_proper = str_replace(label_proper, "\n", " "))

## Add stars

add_stars_to_estimate2 <- function(df) {
  # Define the function to get star according to p-value
  get_star <- function(p) {
    if (p < 0.001) {
      return("***")
    } else if (p < 0.01) {
      return("**")
    } else if (p < 0.05) {
      return("*")
    } else {
      return(NA)
    }
  }

  # Apply the function to each p-value in the data frame
  stars <- sapply(df$p.value, get_star)

  # Append stars to estimate
  df$estimate <- ifelse(is.na(stars), df$estimate, paste(df$estimate, "$^", stars, "$", sep = ""))

  return(df)
}

# Apply the function
tab_full <- add_stars_to_estimate2(tab_full)

## Rem sample and pval

tab_final <- tab_full %>%
  dplyr::select(-sample, -p.value)
tab_final

# Table A.3: Turnout and left party support ----------------------------------------------

kable(tab_final, "latex",
  longtable = F,
  booktabs = T, col.names = c(
    "Outcome",
    "Sample",
    "$\\hat{\\tau}_{\\text{SRK}}$",
    "95\\% CI",
    "$h_{\\text{MSE}}$",
    "$n$"
  ),
  linesep = "", caption = "Main RDD results",
  label = "tab:turnout_gender",
  escape = F
) %>%
  kable_styling(
    latex_options = c("repeat_header"),
    font_size = 11
  ) %>%
  # collapse_rows() %>%
  row_spec(0, bold = T) %>%
  kable_styling(latex_options = "HOLD_position")
